{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "outputs": [
    {
     "data": {
      "text/plain": "['https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-on-covid-19-for-older-people',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/malaria-and-the-covid-19-pandemic',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-and-masks',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-tobacco-and-covid-19',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-hiv-and-antiretrovirals',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-hydroxychloroquine-and-covid-19',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-coronaviruses',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/violence-against-women-during-covid-19',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-and-breastfeeding',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-for-adolescents-and-youth-related-to-covid-19',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-infection-prevention-and-control-for-health-care-workers-caring-for-patients-with-suspected-or-confirmed-2019-ncov',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-pregnancy-and-childbirth',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/be-active-during-covid-19',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-mass-gatherings-and-covid-19',\n 'https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-similarities-and-differences-covid-19-and-influenza']"
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2、获取所有ulr，存在pages1变量。\n",
    "url=\"https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub\"#首页地址\n",
    "pages1=[]#创建一个存放pages1的列表\n",
    "strhtml1 = requests.get(url)\n",
    "soup1=BeautifulSoup(strhtml1.text,'html.parser')\n",
    "# 使用 soup.select 引用每个年份地址的路径\n",
    "data1 = soup1.select(\"#PageContent_C002_Col01 > div.sf-list-vertical.sf-list-vertical__nodate > a\")\n",
    "for item1 in data1:\n",
    "    pages1.append(\"https://www.who.int\"+item1.get('href').replace(\" \",\"\"))#抓取每个年份的url，存入pages1\n",
    "pages1"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-on-covid-19-for-older-people\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/malaria-and-the-covid-19-pandemic\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-and-masks\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-tobacco-and-covid-19\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-hiv-and-antiretrovirals\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-hydroxychloroquine-and-covid-19\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-coronaviruses\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/violence-against-women-during-covid-19\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-and-breastfeeding\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-for-adolescents-and-youth-related-to-covid-19\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-infection-prevention-and-control-for-health-care-workers-caring-for-patients-with-suspected-or-confirmed-2019-ncov\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-covid-19-pregnancy-and-childbirth\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/be-active-during-covid-19\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-on-mass-gatherings-and-covid-19\n",
      "https://www.who.int/zh/emergencies/diseases/novel-coronavirus-2019/question-and-answers-hub/q-a-detail/q-a-similarities-and-differences-covid-19-and-influenza\n"
     ]
    }
   ],
   "source": [
    "Q_list=[]\n",
    "A_list=[]\n",
    "for p1 in pages1:\n",
    "    print(p1)\n",
    "    strhtml2 = requests.get(p1).text\n",
    "    soup2 = BeautifulSoup(strhtml2,\"html.parser\")\n",
    "    Q_h = soup2.select(\"#sf-accordion > div > div.sf-accordion__trigger-panel > a\")\n",
    "    A_h = soup2.select(\"#sf-accordion > div> div.sf-accordion__content > p.sf-accordion__summary\")\n",
    "    a=A_h[0]\n",
    "    a.text.replace('\\n','')\n",
    "    for q in Q_h:\n",
    "        Q=q.text.replace(' ','').replace(('\\r\\n'),'')\n",
    "        Q_list.append(Q)\n",
    "    for a in A_h:\n",
    "        A=a.text.replace('\\n','')\n",
    "        A_list.append(A)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "outputs": [
    {
     "data": {
      "text/plain": "                                        Question  \\\n0                                   什么是COVID-19？   \n1                                COVID-19是如何传播的？   \n2                                   哪些人有患上重症的风险？   \n3                         COVID-19是否有疫苗、药物或治疗措施？   \n4                                      应该如何预防传染？   \n..                                           ...   \n153  在COVID-19背景下赛事举办方是否可以实施或向运动员/官员/游客推荐额外保障措施？   \n154                              乘坐公共交通前往场馆有何风险？   \n155                         COVID-19和流感病毒有何相似之处？   \n156                        COVID-19病毒和流感病毒有什么不同？   \n157                  针对COVID-19病毒和流感病毒有哪些医疗干预措施？   \n\n                                                Answer  \n0    COVID-19（2019冠状病毒病）是一种由以往从未在人类身上发现过的新冠状病毒所致的疾病...  \n1    人们可从有该病毒的其他人身上染上COVID-19。这种疾病可通过鼻口产生的小飞沫实现人传人，...  \n2    老年人和所有年龄段原先已有疾病（如糖尿病、高血压、心脏病、肺病或癌症）的人似乎往往比其他人更...  \n3    尽管某些西医、传统或家庭疗法可能会让人舒服些并减轻COVID-19轻症的症状，但尚无药物已证...  \n4    为预防传染，可以做五件事。经常用肥皂和水彻底洗手，然后彻底干燥。如果双手没有明显的脏污，而且...  \n..                                                 ...  \n153  在运动员村遵循面向公众的标准预防建议，包括保持身体距离。同时促进：向所有参与者及其随行人员发...  \n154  如果在公共交通工具上无法做到保持身体距离，那么，同一直在使用该交通工具的当地民众面临的持续风...  \n155  首先，COVID-19病毒和流感病毒会导致相似的疾病症状。它们都引起呼吸系统疾病，有的患者无...  \n156  这两种病毒的一个重大差异是传播速度。与COVID-19病毒相比，流感的中位潜伏期（从感染到出...  \n157  中国目前正对多种治疗方法进行临床试验。全球共有20多种疫苗在研发中。目前还没有针对COVID...  \n\n[158 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Question</th>\n      <th>Answer</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>什么是COVID-19？</td>\n      <td>COVID-19（2019冠状病毒病）是一种由以往从未在人类身上发现过的新冠状病毒所致的疾病...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>COVID-19是如何传播的？</td>\n      <td>人们可从有该病毒的其他人身上染上COVID-19。这种疾病可通过鼻口产生的小飞沫实现人传人，...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>哪些人有患上重症的风险？</td>\n      <td>老年人和所有年龄段原先已有疾病（如糖尿病、高血压、心脏病、肺病或癌症）的人似乎往往比其他人更...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>COVID-19是否有疫苗、药物或治疗措施？</td>\n      <td>尽管某些西医、传统或家庭疗法可能会让人舒服些并减轻COVID-19轻症的症状，但尚无药物已证...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>应该如何预防传染？</td>\n      <td>为预防传染，可以做五件事。经常用肥皂和水彻底洗手，然后彻底干燥。如果双手没有明显的脏污，而且...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>153</th>\n      <td>在COVID-19背景下赛事举办方是否可以实施或向运动员/官员/游客推荐额外保障措施？</td>\n      <td>在运动员村遵循面向公众的标准预防建议，包括保持身体距离。同时促进：向所有参与者及其随行人员发...</td>\n    </tr>\n    <tr>\n      <th>154</th>\n      <td>乘坐公共交通前往场馆有何风险？</td>\n      <td>如果在公共交通工具上无法做到保持身体距离，那么，同一直在使用该交通工具的当地民众面临的持续风...</td>\n    </tr>\n    <tr>\n      <th>155</th>\n      <td>COVID-19和流感病毒有何相似之处？</td>\n      <td>首先，COVID-19病毒和流感病毒会导致相似的疾病症状。它们都引起呼吸系统疾病，有的患者无...</td>\n    </tr>\n    <tr>\n      <th>156</th>\n      <td>COVID-19病毒和流感病毒有什么不同？</td>\n      <td>这两种病毒的一个重大差异是传播速度。与COVID-19病毒相比，流感的中位潜伏期（从感染到出...</td>\n    </tr>\n    <tr>\n      <th>157</th>\n      <td>针对COVID-19病毒和流感病毒有哪些医疗干预措施？</td>\n      <td>中国目前正对多种治疗方法进行临床试验。全球共有20多种疫苗在研发中。目前还没有针对COVID...</td>\n    </tr>\n  </tbody>\n</table>\n<p>158 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QA_df=pd.DataFrame([Q_list,A_list]).transpose()\n",
    "QA_df.columns=['Question', 'Answer']\n",
    "QA_df"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "outputs": [],
   "source": [
    "QA_df.to_csv('WHO_QA.csv')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "outputs": [],
   "source": [
    "QA_sum=pd.read_csv('DXY_QA.csv',index_col=0).append(QA_df).reset_index().drop(columns='index')\n",
    "QA_sum.to_csv('QA_sum.csv',encoding='utf-8')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "outputs": [
    {
     "data": {
      "text/plain": "                                        Question  \\\n0                                     什么是新型冠状病毒？   \n1                                新型冠状病毒肺炎由什么引起的？   \n2                                    冠状病毒的致病性如何？   \n3                         新型冠状病毒与 SARS 病毒的区别是什么？   \n4                             出现什么症状可能感染了新型冠状病毒？   \n..                                           ...   \n181  在COVID-19背景下赛事举办方是否可以实施或向运动员/官员/游客推荐额外保障措施？   \n182                              乘坐公共交通前往场馆有何风险？   \n183                         COVID-19和流感病毒有何相似之处？   \n184                        COVID-19病毒和流感病毒有什么不同？   \n185                  针对COVID-19病毒和流感病毒有哪些医疗干预措施？   \n\n                                                Answer  \n0    此次流行的冠状病毒为一种新发现的冠状病毒，国际病毒分类委员会命名为 SARS-Cov-2。因...  \n1    由 SARS-Cov-2 冠状病毒引起，WHO 将 SARS-Cov-2 感染导致的疾病命名...  \n2    冠状病毒主要感染成人或较大儿童，引起普通感冒和咽喉炎，某些毒株还可引起成人腹泻。病毒经飞沫传...  \n3    新型冠状病毒与 SARS 病毒、MERS 病毒同属于冠状病毒这个大家族，是「兄弟姐妹」，基因...  \n4    新型冠状病毒感染的一般症状有：发热、乏力、干咳，逐渐出现呼吸困难；部分患者起病症状轻微，甚至...  \n..                                                 ...  \n181  在运动员村遵循面向公众的标准预防建议，包括保持身体距离。同时促进：向所有参与者及其随行人员发...  \n182  如果在公共交通工具上无法做到保持身体距离，那么，同一直在使用该交通工具的当地民众面临的持续风...  \n183  首先，COVID-19病毒和流感病毒会导致相似的疾病症状。它们都引起呼吸系统疾病，有的患者无...  \n184  这两种病毒的一个重大差异是传播速度。与COVID-19病毒相比，流感的中位潜伏期（从感染到出...  \n185  中国目前正对多种治疗方法进行临床试验。全球共有20多种疫苗在研发中。目前还没有针对COVID...  \n\n[186 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Question</th>\n      <th>Answer</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>什么是新型冠状病毒？</td>\n      <td>此次流行的冠状病毒为一种新发现的冠状病毒，国际病毒分类委员会命名为 SARS-Cov-2。因...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>新型冠状病毒肺炎由什么引起的？</td>\n      <td>由 SARS-Cov-2 冠状病毒引起，WHO 将 SARS-Cov-2 感染导致的疾病命名...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>冠状病毒的致病性如何？</td>\n      <td>冠状病毒主要感染成人或较大儿童，引起普通感冒和咽喉炎，某些毒株还可引起成人腹泻。病毒经飞沫传...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>新型冠状病毒与 SARS 病毒的区别是什么？</td>\n      <td>新型冠状病毒与 SARS 病毒、MERS 病毒同属于冠状病毒这个大家族，是「兄弟姐妹」，基因...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>出现什么症状可能感染了新型冠状病毒？</td>\n      <td>新型冠状病毒感染的一般症状有：发热、乏力、干咳，逐渐出现呼吸困难；部分患者起病症状轻微，甚至...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>181</th>\n      <td>在COVID-19背景下赛事举办方是否可以实施或向运动员/官员/游客推荐额外保障措施？</td>\n      <td>在运动员村遵循面向公众的标准预防建议，包括保持身体距离。同时促进：向所有参与者及其随行人员发...</td>\n    </tr>\n    <tr>\n      <th>182</th>\n      <td>乘坐公共交通前往场馆有何风险？</td>\n      <td>如果在公共交通工具上无法做到保持身体距离，那么，同一直在使用该交通工具的当地民众面临的持续风...</td>\n    </tr>\n    <tr>\n      <th>183</th>\n      <td>COVID-19和流感病毒有何相似之处？</td>\n      <td>首先，COVID-19病毒和流感病毒会导致相似的疾病症状。它们都引起呼吸系统疾病，有的患者无...</td>\n    </tr>\n    <tr>\n      <th>184</th>\n      <td>COVID-19病毒和流感病毒有什么不同？</td>\n      <td>这两种病毒的一个重大差异是传播速度。与COVID-19病毒相比，流感的中位潜伏期（从感染到出...</td>\n    </tr>\n    <tr>\n      <th>185</th>\n      <td>针对COVID-19病毒和流感病毒有哪些医疗干预措施？</td>\n      <td>中国目前正对多种治疗方法进行临床试验。全球共有20多种疫苗在研发中。目前还没有针对COVID...</td>\n    </tr>\n  </tbody>\n</table>\n<p>186 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QA_sum\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}